# Replication Code for "Shifting Sands"
# Last code revision: Dec 6, 2012
# Author: Brian Levey <brian.p.levey@gmail.com>

# Libraries
library(MASS)
library(rms)
library(ROCR)

setwd('c:\\code\\shifting sands') # set to current working directory

# Function to simulate model coefficients (adapted from Gelman and Hill)
sim <- function(object, n.sims=100){
	beta.hat <- object$coef
	sd.beta <- sqrt(diag(object$var))
	corr.beta <- cov2cor(vcov(object))
    n <- length(object$y)
	k <- object$stats[4] + 1
	V.beta <- corr.beta * array(sd.beta,c(k,k)) * t(array(sd.beta,c(k,k)))
	beta <- array (NA, c(n.sims,k))
	dimnames(beta) <- list (NULL, names(beta.hat))
	for (s in 1:n.sims){
		beta[s,] <- mvrnorm (1, beta.hat, V.beta)
	}
    # Added by Masanao
    beta2 <- array (0, c(n.sims,length(coefficients(object))))
    dimnames(beta2) <- list (NULL, names(coefficients(object)))
    beta2[,dimnames(beta2)[[2]]%in%dimnames(beta)[[2]]] <- beta
    ans <- list(coef = beta2)
    return(ans)
    }

# Function to extract S.E.	
se <- function(x){sd(x) / sqrt(length(x)-1)}

# Function to calculate .95 confidence interval
conf95 <- function(x,ste,len){x + qt(c(.025, .975), (len-1))*ste}

# LTTE
# Read data
data <- read.delim("ltte.txt", stringsAsFactors=F, header=T)
ltte.hostile <- data$hostilephase

# LTTE model
mod <- lrm(hostilephase ~ l1lnfoodcpi + l12foreigndirectinvestmentnetinflow + l1sKARAllhosttotals + l1posGtsLTTEhosttotals +
						l2posGtsLTTEhosttotals + l1GtsLTTEhosttotals2 + l1GENERICsoctGENERICgovsentAVG + l1GENERICsoctGENERICdissentAVG,
					 	data=data, x=T, y=T, se.fit=T)
mod.sum <- robcov(mod, method='huber')
mod.sum

# Prepare stats for ROC Curve
pr.ltte <- predict(mod, newdata=data, type='fitted.ind')
pr <- prediction(pr.ltte, data$hostilephase)
auc.ltte <- round(as.numeric(performance(pr, measure='auc')@y.values),3)
perf.ltte <- performance(pr, measure='tpr', x.measure='fpr')

# LTTE Out-of-Sample
mod.ltte.out <- glm(hostilephase ~ l1lnfoodcpi + l12foreigndirectinvestmentnetinflow + l1posGtsLTTEhosttotals + l2posGtsLTTEhosttotals + l1GtsLTTEhosttotals2 + l1GENERICsoctGENERICgovsentAVG +
			l1GENERICsoctGENERICdissentAVG, family=binomial(link='logit'), data=data[data$date<as.POSIXct('2002-09-01'),])
pr.ltte <- predict(mod.ltte.out, newdata=data, type='response')

# Simulate from model, calculate confidence intervals, graph results
	# Simulate coefficients
	nsims <- 1000
	set.seed(100)
	sim1 <- sim(mod.sum, nsims)

	# Get pred probs with each var set to mean
	vars <- names(mod$coefficients)
	xdata <- data[,vars[vars!="Intercept"]]
	xdata <- cbind(1, xdata)
	names(xdata)[1] <- "Intercept"
	labs <- as.list(c("Intercept", "Food CPI", "Foreign Direct Investment", "Competing Group to Government Violence", 
										"Government to Group Repression\n1 Month Lag", "Government to Group Repression\n2 Month Lag",
										"Government to Group Repression Squared", "Sentiment Toward Government", "Sentiment Toward Group"))

	# Graph effects
	pdf("Effects from LTTE.pdf", height=11, width=8)
	layout(matrix(1:8, nrow=4, ncol=2, byrow=T))
	for(i in c(2,3,8,9,5,6,4)){
		ynew <- array(NA, c(1000, length(data$hostilephase)))
		xnew <- data[,vars[vars!="Intercept"]]
		xnew <- cbind(1, xnew)
		names(xnew)[1] <- "Intercept"
		
		xnew[,2] <- mean(xnew[,2],na.rm=T)
		xnew[,3] <- mean(xnew[,3],na.rm=T)
		xnew[,4] <- mean(xnew[,4],na.rm=T)
		xnew[,5] <- mean(xnew[,5],na.rm=T)
		xnew[,6] <- mean(xnew[,6],na.rm=T)
		xnew[,8] <- mean(xnew[,8],na.rm=T)
		xnew[,9] <- mean(xnew[,9],na.rm=T)
		
		xnew[,i] <- seq(min(xdata[,i],na.rm=T), max(xdata[,i],na.rm=T), length.out=length(data$hostilephase))
		
		xnew[,7] <- xnew[,5]^2

		for(j in 1:1000){
			coef <- as.matrix(sim1$coef[j,])
			ynew[j,] <- plogis(as.matrix(xnew) %*% coef)
		}

		eff <- apply(ynew, 2, mean)
		effse <- apply(ynew, 2, se)
		conf <- matrix(NA, nrow=length(data$hostilephase), ncol=2)
		for(k in 1:length(data$hostilephase)){
			conf[k,1] <- conf95(eff[k], effse[k], length(data$hostilephase))[1]
			conf[k,2] <- conf95(eff[k], effse[k], length(data$hostilephase))[2]
		}
		
		plot(xnew[,i], eff, type="l", lty=1, lwd=2, ylim=c(0,1), xlab=labs[[i]], ylab="Predicted Probability", main="")
		par(new=T)
		plot(xnew[,i], (conf[,1]-.02), type="l", lty=2, lwd=1, ylim=c(0,1), xaxt='n', yaxt='n', xlab="", ylab="", main="")
		par(new=T)
		plot(xnew[,i], (conf[,2]+.02), type="l", lty=2, lwd=1, ylim=c(0,1), xaxt='n', yaxt='n', xlab="", ylab="", main="")
		
	}
	dev.off()

	
# MILF
# Read data
data <- read.delim("milf.txt", stringsAsFactors=F, header=T)
milf.hostile <- data$hostilephase

# MILF model
mod <- lrm(hostilephase ~ l1umenrate + l12foreigndirectinvestmentnetinflow + l1pNPAGhosttotals + l1posGtpMILFhosttotals + l1GtpMILFhosttotals2 + l2posGtpMILFhosttotals + l2GtpMILFhosttotals2 +
			l2GENERICsoctGENERICgovsentAVG + l2GENERICsoctGENERICdissentAVG, data=data, x=T, y=T, se.fit=T)
mod.sum <- robcov(mod, method='huber')
mod.sum

# Prepare stats for ROC Curve
pr <- predict(mod, newdata=data, type='fitted.ind')
pr <- prediction(pr, data$hostilephase)
auc.milf <- round(as.numeric(performance(pr, measure='auc')@y.values),3)
perf.milf <- performance(pr, measure='tpr', x.measure='fpr')

# MILF Out-of-Sample Model
mod.milf.out <- glm(hostilephase ~ l1umenrate + l12foreigndirectinvestmentnetinflow + l1pNPAGhosttotals + l1posGtpMILFhosttotals + l1GtpMILFhosttotals2 + 
			l2posGtpMILFhosttotals + l2GtpMILFhosttotals2 + l2GENERICsoctGENERICgovsentAVG + l2GENERICsoctGENERICdissentAVG -1, 
			family=binomial(link='logit'), data=data[data$date<as.POSIXct('2003-01-01'),])
pr.milf <- predict(mod.milf.out, newdata=data, type='response')

# Simulate from model, calculate confidence intervals, graph results
	# Simulate coefficients
	nsims <- 1000
	set.seed(100)
	sim1 <- sim(mod.sum, nsims)

	# Get pred probs with each var set to mean
	vars <- names(mod$coefficients)
	xdata <- data[,vars[vars!="Intercept"]]
	xdata <- cbind(1, xdata)
	names(xdata)[1] <- "(Intercept)"
	labs <- as.list(c("Intercept", "Unemployment Rate", "Foreign Direct Investment", "Competing Group to Government Violence", 
										"Government to Group Repression\n1 Month Lag", "Government to Group Repression Squared",
										"Government to Group Repression\n2 Month Lag", "Government to Group Repression\n2 Month Lag Squared",
										"Sentiment Toward Government", "Sentiment Toward Group"))

	# Graph effects
	pdf("Effects from MILF.pdf", height=11, width=8)
	layout(matrix(1:8, nrow=4, ncol=2, byrow=T))
	for(i in c(2,3,9,10,5,7,4)){
	ynew <- array(NA, c(1000, length(data$hostilephase)))
	xnew <- data[,vars[vars!="Intercept"]]
	xnew <- cbind(1, xnew)
	names(xnew)[1] <- "Intercept"
	
	xnew[,2] <- mean(xnew[,2],na.rm=T)
	xnew[,3] <- mean(xnew[,3],na.rm=T)
	xnew[,4] <- mean(xnew[,4],na.rm=T)
	xnew[,5] <- mean(xnew[,5],na.rm=T)
	xnew[,7] <- mean(xnew[,7],na.rm=T)
	xnew[,9] <- mean(xnew[,9],na.rm=T)
	xnew[,10] <- mean(xnew[,10],na.rm=T)
	
	xnew[,i] <- seq(min(xdata[,i],na.rm=T), max(xdata[,i],na.rm=T), length.out=length(data$hostilephase))
	
	xnew[,6] <- xnew[,5]^2
	xnew[,8] <- xnew[,7]^2

	for(j in 1:1000){
		coef <- as.matrix(sim1$coef[j,])
		ynew[j,] <- plogis(as.matrix(xnew) %*% coef)
	}

	eff <- apply(ynew, 2, mean)
	effse <- apply(ynew, 2, se)
	conf <- matrix(NA, nrow=length(data$hostilephase), ncol=2)
	for(k in 1:length(data$hostilephase)){
		conf[k,1] <- conf95(eff[k], effse[k], length(data$hostilephase))[1]
		conf[k,2] <- conf95(eff[k], effse[k], length(data$hostilephase))[2]
	}
	
	plot(xnew[,i], eff, type="l", lty=1, lwd=2, ylim=c(0,1), xlab=labs[[i]], ylab="Predicted Probability", main="")
	par(new=T)
	plot(xnew[,i], (conf[,1]-.02), type="l", lty=2, lwd=1, xaxt='n', yaxt='n', ylim=c(0,1), xlab="", ylab="", main="")
	par(new=T)
	plot(xnew[,i], (conf[,2]+.02), type="l", lty=2, lwd=1, xaxt='n', yaxt='n', ylim=c(0,1), xlab="", ylab="", main="")
	}
	dev.off()
	
# Plot ROC Curves for LTTE and MILF models
	pdf("ROC Curves.pdf", height=11, width=8)
	layout(matrix(1:2, nrow=2, ncol=1, byrow=T))
	plot(perf.ltte, main='LTTE Model ROC Curve')
	text(.05, .2, labels=paste('Area under the ROC Curve:', auc.ltte, sep=' '), pos=4)
	plot(perf.milf, main='MILF Model ROC Curve')
	text(.05, .2, labels=paste('Area under the ROC Curve:', auc.milf, sep=' '), pos=4)
	dev.off()

# Plot stacked bar graphs
	ltte <- ifelse(pr.ltte>.495, 1, 0)
	milf <- ifelse(pr.milf>.5, 1, 0)
	date <- as.POSIXct(data$date)

	pdf('In and Out of sample.pdf', height=11, width=8)
	layout(matrix(1:2, nrow=2, ncol=1, byrow=T))
	# Plot LTTE
	plot(date, ltte, type='n', xaxt='n', yaxt='n', ylim=c(0,2), xlab='Date', ylab='', main='LTTE Model In-Sample and Out-of-Sample Forecasts')
	axis(side=1, at=date, labels=F)
	axis(side=1, at=date[c(seq(1, 97, by=12), 108)], tick=T, labels=seq(1998, 2007, 1))
		for(i in 1:length(date)){
			colr <- ifelse(ltte.hostile[i]==1, 'blue', 'white')
			colr <- ifelse(ltte.hostile[i]==0 & ltte[i]==1 & date[i]<as.POSIXct('2002-09-01'), 'darkred', colr)
			colr <- ifelse(ltte.hostile[i]==0 & ltte[i]==1 & date[i]>=as.POSIXct('2002-09-01'), 'darkgreen', colr)
			rect(date[i], 0, date[i+1], 1, col=colr, border='white')
			}
		for(i in 1:length(date)){
			colr <- ifelse(ltte.hostile[i]==1 & ltte[i]==1 & date[i]<as.POSIXct('2002-09-01'), 'darkred', 'white')
			colr <- ifelse(ltte.hostile[i]==1 & ltte[i]==1 & date[i]>=as.POSIXct('2002-09-01'), 'darkgreen', colr)
			rect(date[i], 1, date[i+1], 2, col=colr, border='white')
			}	
		
	# Plot MILF
	plot(date, milf, type='n', xaxt='n', yaxt='n', ylim=c(0,2), xlab='Date', ylab='', main='MILF Model In-Sample and Out-of-Sample Forecasts')
	axis(side=1, at=date, labels=F)
	axis(side=1, at=date[c(seq(1, 97, by=12), 108)], tick=T, labels=seq(1998, 2007, 1))
		for(i in 1:length(date)){
			colr <- ifelse(milf.hostile[i]==1, 'blue', 'white')
			colr <- ifelse(milf.hostile[i]==0 & milf[i]==1 & date[i]<as.POSIXct('2002-09-01'), 'darkred', colr)
			colr <- ifelse(milf.hostile[i]==0 & milf[i]==1 & date[i]>=as.POSIXct('2002-09-01'), 'darkgreen', colr)
			rect(date[i], 0, date[i+1], 1, col=colr, border='white')
			}
		for(i in 1:length(date)){
			colr <- ifelse(milf.hostile[i]==1 & milf[i]==1 & date[i]<as.POSIXct('2002-09-01'), 'darkred', 'white')
			colr <- ifelse(milf.hostile[i]==1 & milf[i]==1 & date[i]>=as.POSIXct('2002-09-01'), 'darkgreen', colr)
			rect(date[i], 1, date[i+1], 2, col=colr, border='white')
			}
		legend(date[75], .75, legend=c('Actual Hostile Phase', 'In-Sample Predicted\nHostile Phase', 'Out-of-Sample Predicted\nHostile Phase'), lty=1, lwd=2, bty='n', 
			col=c('blue', 'darkred', 'darkgreen'), cex=.75, y.intersp=2)
	dev.off()

